In [ ]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
Predictions in a kernel-SVM are made using the formular
$$ \hat{y} = \alpha_1 y_1 k(\mathbf{x^{(1)}}, \mathbf{x}) + ... + \alpha_n y_n k(\mathbf{x^{(n)}}, \mathbf{x})> 0 $$$$ 0 \leq \alpha_i \leq C $$Radial basis function (Gaussian) kernel: $$k(\mathbf{x}, \mathbf{x'}) = \exp(-\gamma ||\mathbf{x} - \mathbf{x'}||^2)$$
Kernel approximation $\phi$: $$\phi(\mathbf{x})\phi(\mathbf{x'}) \approx k(\mathbf{x}, \mathbf{x'})$$
In [ ]:
from helpers import Timer
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split
digits = load_digits()
X, y = digits.data / 16., digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
In [ ]:
from sklearn.svm import LinearSVC
from sklearn.grid_search import GridSearchCV
grid = GridSearchCV(LinearSVC(random_state=0),
param_grid={'C': np.logspace(-3, 2, 6)}, cv=5)
with Timer():
grid.fit(X_train, y_train)
grid.score(X_test, y_test)
In [ ]:
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
grid = GridSearchCV(SVC(), param_grid={'C': np.logspace(-3, 2, 6),
'gamma': np.logspace(-3, 2, 6)}, cv=5)
with Timer():
grid.fit(X_train, y_train)
grid.score(X_test, y_test)
In [ ]:
from sklearn.kernel_approximation import RBFSampler
from sklearn.pipeline import make_pipeline
pipe = make_pipeline(RBFSampler(random_state=0),
LinearSVC(dual=False, random_state=0))
grid = GridSearchCV(pipe, param_grid={'linearsvc__C': np.logspace(-3, 2, 6),
'rbfsampler__gamma': np.logspace(-3, 2, 6)}, cv=5)
with Timer():
grid.fit(X_train, y_train)
grid.score(X_test, y_test)
In [ ]:
import cPickle
from sklearn.linear_model import SGDClassifier
sgd = SGDClassifier(random_state=0)
for iteration in range(30):
for i in range(9):
X_batch, y_batch = cPickle.load(open("data/batch_%02d.pickle" % i))
sgd.partial_fit(X_batch, y_batch, classes=range(10))
In [ ]:
X_test, y_test = cPickle.load(open("data/batch_09.pickle"))
sgd.score(X_test, y_test)
In [ ]:
sgd = SGDClassifier(random_state=0)
rbf_sampler = RBFSampler(gamma=.2, random_state=0).fit(np.ones((1, 64)))
for iteration in range(30):
for i in range(9):
X_batch, y_batch = cPickle.load(open("data/batch_%02d.pickle" % i))
X_kernel = rbf_sampler.transform(X_batch)
sgd.partial_fit(X_kernel, y_batch, classes=range(10))
In [ ]:
sgd.score(rbf_sampler.transform(X_test), y_test)